{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/tomasonjo/blogs/blob/master/Spacy_Neo4j_Gutenberg_Book/Spacy%20NER%20with%20Neo4j%20Clustering%20on%20Gutenberg%20book.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "* Updated to GDS 2.0 version\n",
        "* Link to original blog post: https://towardsdatascience.com/network-analysis-of-prisoners-of-zenda-book-with-spacy-and-neo4j-b0839a640105"
      ],
      "metadata": {
        "id": "PLkTF151vvYa"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install neo4j spacy\n",
        "!python -m spacy download en_core_web_lg\n"
      ],
      "metadata": {
        "id": "xQuOPNtrv5tl",
        "outputId": "85cb8c38-271c-482a-8f86-6a3e217b2193",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting neo4j\n",
            "  Downloading neo4j-4.4.2.tar.gz (89 kB)\n",
            "\u001b[?25l\r\u001b[K     |███▋                            | 10 kB 16.4 MB/s eta 0:00:01\r\u001b[K     |███████▎                        | 20 kB 11.0 MB/s eta 0:00:01\r\u001b[K     |███████████                     | 30 kB 8.9 MB/s eta 0:00:01\r\u001b[K     |██████████████▋                 | 40 kB 8.3 MB/s eta 0:00:01\r\u001b[K     |██████████████████▎             | 51 kB 4.3 MB/s eta 0:00:01\r\u001b[K     |██████████████████████          | 61 kB 5.0 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▋      | 71 kB 5.4 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▎  | 81 kB 5.6 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 89 kB 3.9 MB/s \n",
            "\u001b[?25hRequirement already satisfied: spacy in /usr/local/lib/python3.7/dist-packages (2.2.4)\n",
            "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from neo4j) (2018.9)\n",
            "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (2.23.0)\n",
            "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.0)\n",
            "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.1.3)\n",
            "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (4.64.0)\n",
            "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (0.9.1)\n",
            "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (7.4.0)\n",
            "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.21.5)\n",
            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (3.0.6)\n",
            "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (0.4.1)\n",
            "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.5)\n",
            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (2.0.6)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy) (57.4.0)\n",
            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.6)\n",
            "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy) (4.11.3)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy) (3.8.0)\n",
            "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy) (4.1.1)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2021.10.8)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.10)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.3)\n",
            "Building wheels for collected packages: neo4j\n",
            "  Building wheel for neo4j (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for neo4j: filename=neo4j-4.4.2-py3-none-any.whl size=115365 sha256=375f8f269c3b4b2bd5e68f1c77965807b84099501344af227bf704360c6d44e3\n",
            "  Stored in directory: /root/.cache/pip/wheels/10/d6/28/95029d7f69690dbc3b93e4933197357987de34fbd44b50a0e4\n",
            "Successfully built neo4j\n",
            "Installing collected packages: neo4j\n",
            "Successfully installed neo4j-4.4.2\n",
            "Collecting en_core_web_lg==2.2.5\n",
            "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz (827.9 MB)\n",
            "\u001b[K     |████████████████████████████████| 827.9 MB 1.1 MB/s \n",
            "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from en_core_web_lg==2.2.5) (2.2.4)\n",
            "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.21.5)\n",
            "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (2.23.0)\n",
            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.6)\n",
            "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.0)\n",
            "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (4.64.0)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (57.4.0)\n",
            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (3.0.6)\n",
            "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (7.4.0)\n",
            "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.1.3)\n",
            "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (0.4.1)\n",
            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (2.0.6)\n",
            "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.5)\n",
            "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (0.9.1)\n",
            "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (4.11.3)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.8.0)\n",
            "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (4.1.1)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (2021.10.8)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.0.4)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (2.10)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (1.24.3)\n",
            "Building wheels for collected packages: en-core-web-lg\n",
            "  Building wheel for en-core-web-lg (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for en-core-web-lg: filename=en_core_web_lg-2.2.5-py3-none-any.whl size=829180942 sha256=575aa8e998aefd0d0fd8abaeee8b7f4f7353dd86950646d465af448f2e44ffbf\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-mrwx1m4p/wheels/11/95/ba/2c36cc368c0bd339b44a791c2c1881a1fb714b78c29a4cb8f5\n",
            "Successfully built en-core-web-lg\n",
            "Installing collected packages: en-core-web-lg\n",
            "Successfully installed en-core-web-lg-2.2.5\n",
            "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
            "You can now load the model via spacy.load('en_core_web_lg')\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GOVTfyZ2vtOt"
      },
      "source": [
        "Restart runtime before continuing in order for SpaCy to work\n",
        "\n",
        "# Data preprocessing"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "IzJm4AshvtOx"
      },
      "outputs": [],
      "source": [
        "# https://www.gutenberg.org/ebooks/95 Prisoner of Zelda\n",
        "\n",
        "# Fetch the data\n",
        "target_url = 'https://www.gutenberg.org/files/95/95-0.txt'\n",
        "import urllib.request\n",
        "data = urllib.request.urlopen(target_url)\n",
        "raw_data = data.read().decode('utf8').strip()\n",
        "\n",
        "# Preprocess text into chapters \n",
        "import re\n",
        "chapters = re.sub('[^A-z0-9 -]', ' ', raw_data).split('CHAPTER')[1:]\n",
        "chapters[-1] = chapters[-1].split('End of the Project Gutenberg EBook')[0]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "182mC1YtvtOz"
      },
      "source": [
        "# Import into Neo4j"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "MPOKdT96vtOz"
      },
      "outputs": [],
      "source": [
        "# import spacy and load an NLP model\n",
        "import spacy\n",
        "nlp = spacy.load(\"en_core_web_lg\", disable=[\"tagger\", \"parser\"])\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "OxeIYL9kvtOz"
      },
      "outputs": [],
      "source": [
        "# Import Neo4j and define cypher queries\n",
        "import neo4j\n",
        "host = 'bolt://3.235.2.228:7687'\n",
        "user = 'neo4j'\n",
        "password = 'seats-drunks-carbon'\n",
        "\n",
        "driver = neo4j.GraphDatabase.driver(host, auth=(user, password))\n",
        "\n",
        "save_query =\"\"\"\n",
        "MERGE (p1:Person{name:$name1})\n",
        "MERGE (p2:Person{name:$name2})\n",
        "MERGE (p1)-[r:RELATED]-(p2)\n",
        "ON CREATE SET r.score = 1\n",
        "ON MATCH SET r.score = r.score + 1\"\"\"\n",
        "\n",
        "constraint_query=\"CREATE CONSTRAINT ON (p:Person) ASSERT p.name IS UNIQUE;\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "id": "wzglMHIIvtO0",
        "outputId": "cb69fad5-405c-4795-cd45-d4b9c1d7c250",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Rassendylls Elphberg\n",
            "Rudolf Rose  \n",
            "Rassendylls Robert\n",
            "Robert Robert\n",
            "Rudolf Rose  \n",
            "Robert Good heavens   \n",
            "Good heavens    Rudolf\n",
            "Rudolf Robert\n",
            "Robert Rudolf\n",
            "Elphberg Rassendylls\n",
            "Burlesdon Strelsau\n",
            "Burlesdon Amelia\n",
            "James   Burlesdon\n",
            "James   Rassendyll\n",
            "Burlesdon Rassendyll\n",
            "Burlesdon a Knight of the Garter\n",
            "Rassendyll a Knight of the Garter\n",
            "Rassendyll Rudolf\n",
            "a Knight of the Garter Rudolf\n",
            "Rose   Nonsense   \n",
            "Jacob Jacob\n",
            "Jacob Rudolf\n",
            "Elphberg Elphberg\n",
            "Elphberg Rudolf\n",
            "Rudolf Strelsau\n",
            "Bob Rose\n"
          ]
        }
      ],
      "source": [
        "# Run the analysis of the first chapter\n",
        "c = chapters[0]\n",
        "# Get involved\n",
        "doc=nlp(c)\n",
        "\n",
        "with driver.session() as session:\n",
        "    #define constraint\n",
        "    session.run(constraint_query)\n",
        "    # Extract Person labels\n",
        "    involved = list(set([ent.text for ent in doc.ents if ent.label_=='PERSON']))\n",
        "    # Preprocess text\n",
        "    decode = dict()\n",
        "    for i,x in enumerate(involved):\n",
        "        # Get mapping\n",
        "        decode['$${}$$'.format(i)] = x\n",
        "        # Preprocess text\n",
        "        c = c.replace(x,' $${}$$ '.format(i))\n",
        "        \n",
        "    # Split chapter into words\n",
        "    ws = c.split()\n",
        "    l = len(ws)\n",
        "    # Iterate through words\n",
        "    for wi,w in enumerate(ws):\n",
        "        # Skip if the word is not a person\n",
        "        if not w[:2] == '$$':\n",
        "            continue\n",
        "        # Check next x words for any involved person\n",
        "        x = 14\n",
        "        for i in range(wi+1,wi+x):\n",
        "            # Avoid list index error\n",
        "            if i >= l:\n",
        "                break\n",
        "            # Skip if the word is not a person\n",
        "            if not ws[i][:2] == '$$':\n",
        "                continue\n",
        "            # Store to Neo4j\n",
        "            params = {'name1':decode[ws[wi]],'name2':decode[ws[i]]}\n",
        "            session.run(save_query, params)\n",
        "            print(decode[ws[wi]],decode[ws[i]])\n",
        "        "
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QJQ6rAU-vtO2"
      },
      "source": [
        "# Graph Analysis"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "dKoppRH-vtO2"
      },
      "outputs": [],
      "source": [
        "# Project the graph\n",
        "graph_projection = \"\"\"\n",
        "CALL gds.graph.project('ch1', 'Person', {RELATED:{orientation:'UNDIRECTED'}})\n",
        "\"\"\"\n",
        "\n",
        "# Run pagerank and louvain algorithm\n",
        "pagerank =\"\"\"\n",
        "CALL gds.pageRank.write('ch1',{writeProperty:'pagerank'})\n",
        "\"\"\"\n",
        "louvain = \"\"\"\n",
        "CALL gds.louvain.write('ch1',{writeProperty:'louvain'})\n",
        "\"\"\"\n",
        "\n",
        "drop_graph = \"\"\"\n",
        "CALL gds.graph.drop('ch1')\n",
        "\"\"\"\n",
        "\n",
        "with driver.session() as session:\n",
        "    session.run(graph_projection)\n",
        "    session.run(pagerank)\n",
        "    session.run(louvain)\n",
        "    session.run(drop_graph)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mIU8L_lYvtO3"
      },
      "source": [
        "# Results"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "tIBn7tj2vtO7"
      },
      "outputs": [],
      "source": [
        "# Import libraries\n",
        "import pandas as pd\n",
        "\n",
        "def read_query(query, params={}):\n",
        "    with driver.session() as session:\n",
        "        result = session.run(query, params)\n",
        "        return pd.DataFrame([r.values() for r in result], columns=result.keys())"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Evaluate pagerank\n",
        "read_query(\"\"\"\n",
        "MATCH (c:Person)\n",
        "RETURN c.name AS character, c.pagerank AS score\n",
        "ORDER BY score DESC LIMIT 5\n",
        "\"\"\")"
      ],
      "metadata": {
        "id": "UqPWBHBkx-J-",
        "outputId": "4c9db6b6-4748-4b2d-8c69-8f0155ba0938",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        }
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "    character     score\n",
              "0      Rudolf  2.234279\n",
              "1   Burlesdon  1.550467\n",
              "2      Robert  1.366045\n",
              "3  Rassendyll  1.177921\n",
              "4    Elphberg  1.115947"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-9fa82270-a97c-4a70-afd1-0222c40ea2d4\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>character</th>\n",
              "      <th>score</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Rudolf</td>\n",
              "      <td>2.234279</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Burlesdon</td>\n",
              "      <td>1.550467</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Robert</td>\n",
              "      <td>1.366045</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Rassendyll</td>\n",
              "      <td>1.177921</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Elphberg</td>\n",
              "      <td>1.115947</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9fa82270-a97c-4a70-afd1-0222c40ea2d4')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-9fa82270-a97c-4a70-afd1-0222c40ea2d4 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-9fa82270-a97c-4a70-afd1-0222c40ea2d4');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Evaluate louvain\n",
        "read_query(\"\"\"\n",
        "MATCH (c:Person)\n",
        "RETURN c.louvain AS community, collect(c.name) AS members\n",
        "ORDER BY size(members) DESC\n",
        "\"\"\")"
      ],
      "metadata": {
        "id": "9Gm32eoLyE3Z",
        "outputId": "7b97b170-90bc-4cbd-9076-f91acd3b7edf",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 175
        }
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   community                                            members\n",
              "0          3     [Rudolf, Rose  , Strelsau, Nonsense   , Jacob]\n",
              "1         10  [Burlesdon, Amelia, James  , Rassendyll, a Kni...\n",
              "2          5   [Rassendylls, Elphberg, Robert, Good heavens   ]\n",
              "3         15                                        [Bob, Rose]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-30bd820c-4c9a-41fa-b0ca-4fedc4cdb93c\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>community</th>\n",
              "      <th>members</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>3</td>\n",
              "      <td>[Rudolf, Rose  , Strelsau, Nonsense   , Jacob]</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>10</td>\n",
              "      <td>[Burlesdon, Amelia, James  , Rassendyll, a Kni...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>5</td>\n",
              "      <td>[Rassendylls, Elphberg, Robert, Good heavens   ]</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>15</td>\n",
              "      <td>[Bob, Rose]</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-30bd820c-4c9a-41fa-b0ca-4fedc4cdb93c')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-30bd820c-4c9a-41fa-b0ca-4fedc4cdb93c button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-30bd820c-4c9a-41fa-b0ca-4fedc4cdb93c');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        ""
      ],
      "metadata": {
        "id": "OPJe0M-YyV5c"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.5.2"
    },
    "colab": {
      "name": "Spacy NER with Neo4j Clustering on Gutenberg book.ipynb",
      "provenance": [],
      "include_colab_link": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}